/*

   BLIS
   An object-based framework for developing high-performance BLAS-like
   libraries.

   Copyright (C) 2024, SiFive, Inc.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
   met:
    - Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    - Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.
    - Neither the name(s) of the copyright holder(s) nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/

// 6. Configuration-Setting and Utility Functions
#define RVV_TYPE_B_(RATIO) vbool##RATIO##_t
#define RVV_TYPE_B(RATIO) RVV_TYPE_B_(RATIO)
#define RVV_TYPE_U_(PRECISION, LMUL) vuint##PRECISION##LMUL##_t
#define RVV_TYPE_U(PRECISION, LMUL) RVV_TYPE_U_(PRECISION, LMUL)
#define RVV_TYPE_F_(PRECISION, LMUL) vfloat##PRECISION##LMUL##_t
#define RVV_TYPE_F(PRECISION, LMUL) RVV_TYPE_F_(PRECISION, LMUL)
#define RVV_TYPE_FX_(PRECISION, LMUL, NFIELDS) vfloat##PRECISION##LMUL##x##NFIELDS##_t
#define RVV_TYPE_FX(PRECISION, LMUL, NFIELDS) RVV_TYPE_FX_(PRECISION, LMUL, NFIELDS)
#define VSETVL_(PRECISION, LMUL) __riscv_vsetvl_e##PRECISION##LMUL
#define VSETVL(PRECISION, LMUL) VSETVL_(PRECISION, LMUL)

// 7. Vector Loads and Stores
// Loads
#define VLE_V_F_(PRECISION, LMUL)   __riscv_vle##PRECISION##_v_f##PRECISION##LMUL
#define VLE_V_F(PRECISION, LMUL)   VLE_V_F_(PRECISION, LMUL)
#define VLSE_V_F_(PRECISION, LMUL) __riscv_vlse##PRECISION##_v_f##PRECISION##LMUL
#define VLSE_V_F(PRECISION, LMUL) VLSE_V_F_(PRECISION, LMUL)
#define VLSEG2_V_F_(PRECISION, LMUL, NFIELDS)   __riscv_vlseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VLSEG2_V_F(PRECISION, LMUL, NFIELDS)   VLSEG2_V_F_(PRECISION, LMUL, NFIELDS)
#define VLSSEG2_V_F_(PRECISION, LMUL, NFIELDS)   __riscv_vlsseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VLSSEG2_V_F(PRECISION, LMUL, NFIELDS)   VLSSEG2_V_F_(PRECISION, LMUL, NFIELDS)
#define VLE_V_F_TU_(PRECISION, LMUL)   __riscv_vle##PRECISION##_v_f##PRECISION##LMUL##_tu
#define VLE_V_F_TU(PRECISION, LMUL)   VLE_V_F_TU_(PRECISION, LMUL)
#define VLSE_V_F_TU_(PRECISION, LMUL) __riscv_vlse##PRECISION##_v_f##PRECISION##LMUL##_tu
#define VLSE_V_F_TU(PRECISION, LMUL) VLSE_V_F_TU_(PRECISION, LMUL)
#define VLSEG2_V_F_TU_(PRECISION, LMUL, NFIELDS)   __riscv_vlseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS##_tu
#define VLSEG2_V_F_TU(PRECISION, LMUL, NFIELDS)   VLSEG2_V_F_TU_(PRECISION, LMUL, NFIELDS)
#define VLSSEG2_V_F_TU_(PRECISION, LMUL, NFIELDS)   __riscv_vlsseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS##_tu
#define VLSSEG2_V_F_TU(PRECISION, LMUL, NFIELDS)   VLSSEG2_V_F_TU_(PRECISION, LMUL, NFIELDS)
// Stores
#define VSE_V_F_(PRECISION, LMUL)   __riscv_vse##PRECISION##_v_f##PRECISION##LMUL
#define VSE_V_F(PRECISION, LMUL) VSE_V_F_(PRECISION, LMUL)
#define VSSE_V_F_(PRECISION, LMUL) __riscv_vsse##PRECISION##_v_f##PRECISION##LMUL
#define VSSE_V_F(PRECISION, LMUL) VSSE_V_F_(PRECISION, LMUL)
#define VSSEG2_V_F_(PRECISION, LMUL, NFIELDS)   __riscv_vsseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSEG2_V_F(PRECISION, LMUL, NFIELDS) VSSEG2_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG2_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg2e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG2_V_F(PRECISION, LMUL, NFIELDS) VSSSEG2_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG3_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg3e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG3_V_F(PRECISION, LMUL, NFIELDS) VSSSEG3_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG4_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg4e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG4_V_F(PRECISION, LMUL, NFIELDS) VSSSEG4_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG5_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg5e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG5_V_F(PRECISION, LMUL, NFIELDS) VSSSEG5_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG6_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg6e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG6_V_F(PRECISION, LMUL, NFIELDS) VSSSEG6_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG7_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg7e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG7_V_F(PRECISION, LMUL, NFIELDS) VSSSEG7_V_F_(PRECISION, LMUL, NFIELDS)
#define VSSSEG8_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vssseg8e##PRECISION##_v_f##PRECISION##LMUL##x##NFIELDS
#define VSSSEG8_V_F(PRECISION, LMUL, NFIELDS) VSSSEG8_V_F_(PRECISION, LMUL, NFIELDS)

// 11. Vector Integer Arithmetic Operations
#define VADD_VX_U_(PRECISION, LMUL) __riscv_vadd_vx_u##PRECISION##LMUL
#define VADD_VX_U(PRECISION, LMUL) VADD_VX_U_(PRECISION, LMUL)
#define VMERGE_VVM_TU_U_(PRECISION, LMUL) __riscv_vmerge_vvm_u##PRECISION##LMUL##_tu
#define VMERGE_VVM_TU_U(PRECISION, LMUL) VMERGE_VVM_TU_U_(PRECISION, LMUL)

// 13. Vector Floating-Point Operations
#define VFADD_VV_(PRECISION, LMUL) __riscv_vfadd_vv_f##PRECISION##LMUL
#define VFADD_VV(PRECISION, LMUL) VFADD_VV_(PRECISION, LMUL)
#define VFSUB_VV_(PRECISION, LMUL) __riscv_vfsub_vv_f##PRECISION##LMUL
#define VFSUB_VV(PRECISION, LMUL) VFSUB_VV_(PRECISION, LMUL)
#define VFMUL_VV_(PRECISION, LMUL) __riscv_vfmul_vv_f##PRECISION##LMUL
#define VFMUL_VV(PRECISION, LMUL) VFMUL_VV_(PRECISION, LMUL)
#define VFMUL_VF_(PRECISION, LMUL) __riscv_vfmul_vf_f##PRECISION##LMUL
#define VFMUL_VF(PRECISION, LMUL) VFMUL_VF_(PRECISION, LMUL)
#define VFDIV_VV_(PRECISION, LMUL) __riscv_vfdiv_vv_f##PRECISION##LMUL
#define VFDIV_VV(PRECISION, LMUL) VFDIV_VV_(PRECISION, LMUL)
#define VFRDIV_VF_(PRECISION, LMUL) __riscv_vfrdiv_vf_f##PRECISION##LMUL
#define VFRDIV_VF(PRECISION, LMUL) VFRDIV_VF_(PRECISION, LMUL)
#define VFMACC_VV_(PRECISION, LMUL) __riscv_vfmacc_vv_f##PRECISION##LMUL
#define VFMACC_VV(PRECISION, LMUL) VFMACC_VV_(PRECISION, LMUL)
#define VFMACC_VF_(PRECISION, LMUL) __riscv_vfmacc_vf_f##PRECISION##LMUL
#define VFMACC_VF(PRECISION, LMUL) VFMACC_VF_(PRECISION, LMUL)
#define VFMSAC_VF_(PRECISION, LMUL) __riscv_vfmsac_vf_f##PRECISION##LMUL
#define VFMSAC_VF(PRECISION, LMUL) VFMSAC_VF_(PRECISION, LMUL)
#define VFNMSAC_VV_(PRECISION, LMUL) __riscv_vfnmsac_vv_f##PRECISION##LMUL
#define VFNMSAC_VV(PRECISION, LMUL) VFNMSAC_VV_(PRECISION, LMUL)
#define VFNMSAC_VF_(PRECISION, LMUL) __riscv_vfnmsac_vf_f##PRECISION##LMUL
#define VFNMSAC_VF(PRECISION, LMUL) VFNMSAC_VF_(PRECISION, LMUL)
#define VFMADD_VF_(PRECISION, LMUL) __riscv_vfmadd_vf_f##PRECISION##LMUL
#define VFMADD_VF(PRECISION, LMUL)  VFMADD_VF_(PRECISION, LMUL)
#define VFMSUB_VF_(PRECISION, LMUL) __riscv_vfmsub_vf_f##PRECISION##LMUL
#define VFMSUB_VF(PRECISION, LMUL) VFMSUB_VF_(PRECISION, LMUL)
#define VFMAX_VV_(PRECISION, LMUL) __riscv_vfmax_vv_f##PRECISION##LMUL
#define VFMAX_VV(PRECISION, LMUL) VFMAX_VV_(PRECISION, LMUL)
#define VFNEG_VF_(PRECISION, LMUL) __riscv_vfneg_v_f##PRECISION##LMUL
#define VFNEG_VF(PRECISION, LMUL)  VFNEG_VF_(PRECISION, LMUL)
#define VFABS_V_(PRECISION, LMUL) __riscv_vfabs_v_f##PRECISION##LMUL
#define VFABS_V(PRECISION, LMUL) VFABS_V_(PRECISION, LMUL)
#define VMFEQ_VV_(PRECISION, LMUL, RATIO) __riscv_vmfeq_vv_f##PRECISION##LMUL##_b##RATIO
#define VMFEQ_VV(PRECISION, LMUL, RATIO) VMFEQ_VV_(PRECISION, LMUL, RATIO)
#define VMFNE_VV_(PRECISION, LMUL, RATIO) __riscv_vmfne_vv_f##PRECISION##LMUL##_b##RATIO
#define VMFNE_VV(PRECISION, LMUL, RATIO) VMFNE_VV_(PRECISION, LMUL, RATIO)
#define VMFGT_VV_(PRECISION, LMUL, RATIO) __riscv_vmfgt_vv_f##PRECISION##LMUL##_b##RATIO
#define VMFGT_VV(PRECISION, LMUL, RATIO) VMFGT_VV_(PRECISION, LMUL, RATIO)
#define VMFGE_VV_(PRECISION, LMUL, RATIO) __riscv_vmfge_vv_f##PRECISION##LMUL##_b##RATIO
#define VMFGE_VV(PRECISION, LMUL, RATIO) VMFGE_VV_(PRECISION, LMUL, RATIO)
#define VMERGE_VVM_F_(PRECISION, LMUL) __riscv_vmerge_vvm_f##PRECISION##LMUL
#define VMERGE_VVM_F(PRECISION, LMUL) VMERGE_VVM_F_(PRECISION, LMUL)
#define VFMV_V_V_(PRECISION, LMUL) VREINTERPRET_V_I_F(PRECISION, LMUL)(  __riscv_vmv_v_v_i##PRECISION##LMUL( VREINTERPRET_V_F_I(PRECISION, LMUL) CURRY_1ARG
#define VFMV_V_V(PRECISION, LMUL) VFMV_V_V_(PRECISION, LMUL)
#define VFMV_V_F_(PRECISION, LMUL) __riscv_vfmv_v_f_f##PRECISION##LMUL
#define VFMV_V_F(PRECISION, LMUL) VFMV_V_F_(PRECISION, LMUL)

#define VFMUL_VF_TU_(PRECISION, LMUL) __riscv_vfmul_vf_f##PRECISION##LMUL##_tu
#define VFMUL_VF_TU(PRECISION, LMUL) VFMUL_VF_TU_(PRECISION, LMUL)
#define VFMACC_VV_TU_(PRECISION, LMUL) __riscv_vfmacc_vv_f##PRECISION##LMUL##_tu
#define VFMACC_VV_TU(PRECISION, LMUL) VFMACC_VV_TU_(PRECISION, LMUL)
#define VFMACC_VF_TU_(PRECISION, LMUL) __riscv_vfmacc_vf_f##PRECISION##LMUL##_tu
#define VFMACC_VF_TU(PRECISION, LMUL) VFMACC_VF_TU_(PRECISION, LMUL)
#define VFMSAC_VF_TU_(PRECISION, LMUL) __riscv_vfmsac_vf_f##PRECISION##LMUL##_tu
#define VFMSAC_VF_TU(PRECISION, LMUL) VFMSAC_VF_TU_(PRECISION, LMUL)
#define VFNMSAC_VV_TU_(PRECISION, LMUL) __riscv_vfnmsac_vv_f##PRECISION##LMUL##_tu
#define VFNMSAC_VV_TU(PRECISION, LMUL) VFNMSAC_VV_TU_(PRECISION, LMUL)
#define VFNMSAC_VF_TU_(PRECISION, LMUL) __riscv_vfnmsac_vf_f##PRECISION##LMUL##_tu
#define VFNMSAC_VF_TU(PRECISION, LMUL) VFNMSAC_VF_TU_(PRECISION, LMUL)
#define VFMAX_VV_TU_(PRECISION, LMUL) __riscv_vfmax_vv_f##PRECISION##LMUL##_tu
#define VFMAX_VV_TU(PRECISION, LMUL) VFMAX_VV_TU_(PRECISION, LMUL)
#define VFNEG_VF_TU_(PRECISION, LMUL) __riscv_vfneg_v_f##PRECISION##LMUL##_tu
#define VFNEG_VF_TU(PRECISION, LMUL)  VFNEG_VF_TU_(PRECISION, LMUL)

// 14. Vector Reduction Operations
#define VREDMINU_VS_M_(PRECISION, LMUL) __riscv_vredminu_vs_u##PRECISION##LMUL##_u##PRECISION##m1_m
#define VREDMINU_VS_M(PRECISION, LMUL) VREDMINU_VS_M_(PRECISION, LMUL)
#define VF_REDUSUM_VS_(PRECISION, LMUL) __riscv_vfredusum_vs_f##PRECISION##LMUL##_f##PRECISION##m1
#define VF_REDUSUM_VS(PRECISION, LMUL) VF_REDUSUM_VS_(PRECISION, LMUL)
#define VFREDMAX_VS_(PRECISION, LMUL) __riscv_vfredmax_vs_f##PRECISION##LMUL##_f##PRECISION##m1
#define VFREDMAX_VS(PRECISION, LMUL) VFREDMAX_VS_(PRECISION, LMUL)

// 15. Vector Mask Operations
#define VFIRST_M_(RATIO) __riscv_vfirst_m_b##RATIO
#define VFIRST_M(RATIO) VFIRST_M_(RATIO)
#define VID_V_(PRECISION, LMUL) __riscv_vid_v_u##PRECISION##LMUL
#define VID_V(PRECISION, LMUL) VID_V_(PRECISION, LMUL)

// 16. Vector Permutation Operations
#define VMV_X_S_U_(PRECISION) __riscv_vmv_x_s_u##PRECISION##m1_u##PRECISION
#define VMV_X_S_U(PRECISION) VMV_X_S_U_(PRECISION)
#define VMV_S_X_U_(PRECISION, LMUL) __riscv_vmv_s_x_u##PRECISION##LMUL
#define VMV_S_X_U(PRECISION, LMUL) VMV_S_X_U_(PRECISION, LMUL)
#define VFMV_F_S_(PRECISION) __riscv_vfmv_f_s_f##PRECISION##m1_f##PRECISION
#define VFMV_F_S(PRECISION) VFMV_F_S_(PRECISION)
#define VFMV_S_F_(PRECISION, LMUL) __riscv_vfmv_s_f_f##PRECISION##LMUL
#define VFMV_S_F(PRECISION, LMUL) VFMV_S_F_(PRECISION, LMUL)
#define VRGATHER_VX_F_(PRECISION, LMUL) __riscv_vrgather_vx_f##PRECISION##LMUL
#define VRGATHER_VX_F(PRECISION, LMUL) VRGATHER_VX_F_(PRECISION, LMUL)

// Miscellaneous Vector Function
#define VREINTERPRET_V_I_F_(PRECISION, LMUL) __riscv_vreinterpret_v_i##PRECISION##LMUL##_f##PRECISION##LMUL
#define VREINTERPRET_V_I_F(PRECISION, LMUL) VREINTERPRET_V_I_F_(PRECISION, LMUL)
#define VREINTERPRET_V_F_I_(PRECISION, LMUL) __riscv_vreinterpret_v_f##PRECISION##LMUL##_i##PRECISION##LMUL
#define VREINTERPRET_V_F_I(PRECISION, LMUL) VREINTERPRET_V_F_I_(PRECISION, LMUL)
#define VLMUL_EXT_V_F_M1_(PRECISION, LMUL) __riscv_vlmul_ext_v_f##PRECISION##m1##_f##PRECISION##LMUL
#define VLMUL_EXT_V_F_M1(PRECISION, LMUL) VLMUL_EXT_V_F_M1_(PRECISION, LMUL)
#define VUNDEFINED_FX_(PRECISION, LMUL, NFIELDS) __riscv_vundefined_f##PRECISION##LMUL##x##NFIELDS
#define VUNDEFINED_FX(PRECISION, LMUL, NFIELDS) VUNDEFINED_FX_(PRECISION, LMUL, NFIELDS)
#define VSET_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vset_v_f##PRECISION##LMUL##_f##PRECISION##LMUL##x##NFIELDS
#define VSET_V_F(PRECISION, LMUL, NFIELDS) VSET_V_F_(PRECISION, LMUL, NFIELDS)
#define VGET_V_F_(PRECISION, LMUL, NFIELDS) __riscv_vget_v_f##PRECISION##LMUL##x##NFIELDS##_f##PRECISION##LMUL
#define VGET_V_F(PRECISION, LMUL, NFIELDS) VGET_V_F_(PRECISION, LMUL, NFIELDS)
#define VCREATE_V_FX_(PRECISION, LMUL, NFIELDS) __riscv_vcreate_v_f##PRECISION##LMUL##x##NFIELDS
#define VCREATE_V_FX(PRECISION, LMUL, NFIELDS) VCREATE_V_FX_(PRECISION, LMUL, NFIELDS)

// Non-vector functions
#define CURRY_1ARG(arg1, ...) (arg1), __VA_ARGS__))
